pacman::p_load(ggplot2, dplyr, caret, tidyr,arules, arulesViz, RColorBrewer, caTools )
# install.packages("arules")
# install.packages("arulesViz")
# library("arules")
# library("arulesViz")
#Upload and read the dataset
tr <- read.transactions("~/Documents/@/UBIQUM/DATAML/Week7/Market_Basket_Analysis/MarketBasketAnalysis/DATASETS/ElectronidexTransactions.csv", format = 'basket', sep=',', header = F)
#Convert dataset as transaction object
#trObj<-as(tr,"transactions")
#Summary of the data
length (tr) # Number of transactions.
## [1] 10454
tr
## transactions in sparse format with
## 10454 transactions (rows) and
## 4248 items (columns)
summary(tr)
## transactions as itemMatrix in sparse format with
## 10454 rows (elements/itemsets/transactions) and
## 4248 columns (items) and a density of 0.0005853147
##
## most frequent items:
## APP0692 APP1184 SAM0068 APP1208 WDT0177 (Other)
## 290 283 214 210 185 24811
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 13
## 2 7197 2138 700 260 89 30 18 12 3 4 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 2.486 3.000 13.000
##
## includes extended item information - examples:
## labels
## 1 8MO0001
## 2 8MO0002
## 3 8MO0003
items(tr)
## itemMatrix in sparse format with
## 10454 rows (elements/transactions) and
## 4248 columns (items)
#Visualize the data
itemFrequencyPlot(tr)

#its not working!!
#image(tr[1:500])
#image(sample(tr, 2000))
#are there any duplicate?
#duplicated(tr)
#Inspect particular elements
inspect(tr[1:100])
## items
## [1] {items}
## [2] {IFX0014,IFX0049,MOP0083,TUC0302}
## [3] {APP1130,CRU0045,OWC0048,SEA0038}
## [4] {APP0405,APP1208}
## [5] {APP0432,APP1457,APP2487,APP2552,BOS0059}
## [6] {APP1208,APP1459,ELA0004,LAC0221,LGE0038}
## [7] {APP1916,APP2498}
## [8] {ALL0004,APP2523,LAC0166,SAN0150,WOE0002}
## [9] {BEZ0209,SAT0024}
## [10] {APP1037,LAC0176,SAT0003}
## [11] {EVU0003,SAN0039}
## [12] {CAD0005,PAC2070}
## [13] {SEA0033,SEA0043}
## [14] {APP2481,APP2513}
## [15] {APP1859,OWC0216,OWC0226}
## [16] {OWC0035-2,OWC0036}
## [17] {BEL0183,ELA0021}
## [18] {FCM0010,SAN0106}
## [19] {KIN0144,LAC0225}
## [20] {PHI0063,WDT0135}
## [21] {AP20299,OWC0002}
## [22] {LEX0010,PAC1494}
## [23] {PHI0057,PHI0066}
## [24] {BEL0263,LAC0206}
## [25] {BEL0223,KIN0115,MOS0059,MOS0148}
## [26] {APP2477,IOT0012,OTT0156,XDO0033}
## [27] {APP0696,NTE0015}
## [28] {APP0431,IOT0008,IOT0020}
## [29] {APP0692,BNQ0036-A,SAN0116-A}
## [30] {APP1565,LAC0176,OWC0184}
## [31] {APP0656,CAD0005,PAC2105}
## [32] {OWC0001,SEA0096}
## [33] {APP2323,SAT0023}
## [34] {APP1204,KIN0153-2}
## [35] {SAN0139,STA0043,STA0046}
## [36] {APP0660,APP1565,IFX0131,IFX0163}
## [37] {OWC0165,PAC1493}
## [38] {GRT0394,THU0016}
## [39] {APP1459,MOS0176}
## [40] {NTE0007,NTE0020,OWC0094,PAC0748}
## [41] {APP2058,LMP0021}
## [42] {BEL0236,GRT0438}
## [43] {MOX0020,SAN0093}
## [44] {MUV0162,XOO0005,XOO0006}
## [45] {IFX0010,OWC0036,SAM0068,SEA0097}
## [46] {APP0660,APP1566}
## [47] {APP0656,PAC1588}
## [48] {KIN0150,NTE0007,OWC0094,SAM0069}
## [49] {SYN0133,WDT0177}
## [50] {APP0404,BEL0223,DLK0072,LAC0199,PAC0486}
## [51] {APP0017,APP1208}
## [52] {SYN0140,WDT0177}
## [53] {SYN0121,WDT0177}
## [54] {WAC0034,WAC0185}
## [55] {APP0695,SPE0155}
## [56] {KEN0206,SAN0106}
## [57] {SAN0084,THU0019}
## [58] {PHI0066,PHI0073}
## [59] {GRT0355,OTT0133}
## [60] {APP0692,GRT0369}
## [61] {LIF0089,SAT0008}
## [62] {PHI0070,SAT0044}
## [63] {SAN0093,SEA0100}
## [64] {OWC0018,OWC0086,SAM0068}
## [65] {IFX0015,IFX0039,IFX0073}
## [66] {SAM0067,SYN0121}
## [67] {IFX0039,TRK0003}
## [68] {SYN0122,WDT0135}
## [69] {IFX0144,NTE0038,OWC0040-2,PAC1498}
## [70] {SYN0180,WDT0177}
## [71] {APP1639,APP1641}
## [72] {APP1205,APP1669}
## [73] {MOX0024,SAT0015}
## [74] {OWC0001,OWC0037-2,SAM0063,SEA0096}
## [75] {KAN0021,NTE0006}
## [76] {PHI0055,WOE0005,WOE0006}
## [77] {ELG0032,ELG0034}
## [78] {HTE0002,TIG0018}
## [79] {BEL0165,TPL0032}
## [80] {APP0921,MOS0059,MUV0145}
## [81] {ELG0042,PAC1394,SAT0005,SAT0028,SAT0037}
## [82] {MOX0009,SPE0162}
## [83] {APP0692,OTT0130,SAT0017}
## [84] {HTE0003,WIT0024}
## [85] {IFX0014,TRA0004}
## [86] {OWC0181-2,PAC0617}
## [87] {APP0921,TPL0032}
## [88] {LIF0070,LIF0072,LIF0099}
## [89] {BEL0292,MOS0204}
## [90] {GRT0421,IFX0013,MOX0012}
## [91] {SAN0127,SAN0128}
## [92] {PAC1729,TOS0014}
## [93] {KAN0022,OWC0036-2}
## [94] {APP1146,WOE0002}
## [95] {APP1041,APP1915,WIT0026}
## [96] {APP0921,BEL0206,SPE0135}
## [97] {HTE0004,PAR0044,TAM0006}
## [98] {HTE0001,HTE0003}
## [99] {BEL0280,MUV0162,OTT0153}
## [100] {OWC0142,SEA0038}
#Items Frequency (absolute)
#plots the numeric frequencies of each item independently
itemFrequencyPlot(tr,topN=20,type="absolute",col=brewer.pal(8,'Pastel2'), main="Absolute Item Frequency Plot")

#Items Frequency (relative)
#plots how many times these items have appeared as compared to others. #APP0692 and APP 1184 have the most sales
itemFrequencyPlot(tr,topN=20,type="relative",col=brewer.pal(8,'Pastel2'),main="Relative Item Frequency Plot")

#Evaluating model performance
# Min Support as 0.001, confidence as 0.8.
# Min Support as 0.0005, confidence as 0.8
associationrules <- apriori(tr, parameter = list(supp=0.0005, conf=0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 5e-04 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 5
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1125 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(associationrules)
## lhs rhs support confidence lift count
## [1] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.16667 6
## [2] {WAC0156} => {WAC0158} 0.0006696002 1.0000000 1493.42857 7
## [3] {WAC0158} => {WAC0156} 0.0006696002 1.0000000 1493.42857 7
## [4] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.94118 6
## [5] {NES0009} => {NES0006} 0.0006696002 1.0000000 1045.40000 7
## [6] {APP1803} => {APP1215} 0.0016261718 0.8947368 65.40964 17
summary(associationrules) # 6 rules.: 6 of two products
## set of 6 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 6
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift count
## Min. :0.0005739 Min. :0.8947 Min. : 65.41 Min. : 6.000
## 1st Qu.:0.0005979 1st Qu.:1.0000 1st Qu.: 679.00 1st Qu.: 6.250
## Median :0.0006696 Median :1.0000 Median : 958.28 Median : 7.000
## Mean :0.0007971 Mean :0.9825 Mean : 930.63 Mean : 8.333
## 3rd Qu.:0.0006696 3rd Qu.:1.0000 3rd Qu.:1381.42 3rd Qu.: 7.000
## Max. :0.0016262 Max. :1.0000 Max. :1493.43 Max. :17.000
##
## mining info:
## data ntransactions support confidence
## tr 10454 5e-04 0.8
# Min Support as 0.0005, confidence as 0.5
associationrules1 <- apriori(tr, parameter = list(supp=0.0005, conf=0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 5e-04 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 5
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1125 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [48 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(associationrules1)
## lhs rhs support confidence lift
## [1] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.16667
## [2] {OWC0054} => {OWC0056} 0.0005739430 0.5000000 871.16667
## [3] {WAC0156} => {WAC0158} 0.0006696002 1.0000000 1493.42857
## [4] {WAC0158} => {WAC0156} 0.0006696002 1.0000000 1493.42857
## [5] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.94118
## [6] {NES0009} => {NES0006} 0.0006696002 1.0000000 1045.40000
## [7] {NES0006} => {NES0009} 0.0006696002 0.7000000 1045.40000
## [8] {PAC2115} => {CAD0005} 0.0005739430 0.7500000 186.67857
## [9] {APP2125} => {APP1215} 0.0005739430 0.7500000 54.82867
## [10] {QNA0149} => {WDT0177} 0.0005739430 0.6666667 37.67207
## [11] {SYN0174} => {WDT0177} 0.0005739430 0.5454545 30.82260
## [12] {APP2114} => {APP1215} 0.0005739430 0.6000000 43.86294
## [13] {APP1575} => {LIBRO} 0.0007652573 0.6666667 142.23129
## [14] {APP1623} => {APP1215} 0.0005739430 0.6666667 48.73660
## [15] {PAC2154} => {APP0017} 0.0005739430 0.6000000 101.16774
## [16] {SSE0005} => {SSE0004} 0.0006696002 0.6363636 604.77686
## [17] {SSE0004} => {SSE0005} 0.0006696002 0.6363636 604.77686
## [18] {NEA0011} => {NEA0004} 0.0005739430 0.7500000 871.16667
## [19] {NEA0004} => {NEA0011} 0.0005739430 0.6666667 871.16667
## [20] {APP2113} => {APP1215} 0.0006696002 0.7777778 56.85936
## [21] {SPH0014} => {SPH0016} 0.0005739430 0.5454545 335.42246
## [22] {SPH0014} => {SPH0015} 0.0006696002 0.6363636 332.62727
## [23] {IFX0087} => {IFX0028} 0.0007652573 0.7272727 330.56126
## [24] {ALL0011} => {ALL0002} 0.0006696002 0.5833333 203.27222
## [25] {PHI0054} => {PHI0070} 0.0005739430 0.5454545 91.97067
## [26] {SNS0019} => {SNS0014} 0.0007652573 0.5000000 193.59259
## [27] {APP2118} => {APP1215} 0.0008609145 0.6428571 46.99600
## [28] {SPH0016} => {SPH0015} 0.0010522288 0.6470588 338.21765
## [29] {SPH0015} => {SPH0016} 0.0010522288 0.5500000 338.21765
## [30] {APP2486} => {APP1184} 0.0008609145 0.6923077 25.57380
## [31] {QNA0210} => {WDT0177} 0.0011478860 0.5714286 32.29035
## [32] {APP2480} => {APP1184} 0.0007652573 0.5000000 18.46996
## [33] {SYN0180} => {WDT0177} 0.0012435431 0.5652174 31.93937
## [34] {OWC0235-2} => {NTE0007} 0.0006696002 0.5000000 29.20112
## [35] {OWC0104} => {NTE0007} 0.0009565716 0.6250000 36.50140
## [36] {APP0428} => {APP0432} 0.0009565716 0.5000000 121.55814
## [37] {APP2142} => {APP1215} 0.0011478860 0.5454545 39.87540
## [38] {PHI0061} => {PHI0062} 0.0009565716 0.5882353 323.65325
## [39] {PHI0062} => {PHI0061} 0.0009565716 0.5263158 323.65325
## [40] {SYN0181} => {WDT0177} 0.0013392003 0.5000000 28.25405
## [41] {APP1040} => {APP1565} 0.0009565716 0.5555556 148.91738
## [42] {APP2485} => {APP1184} 0.0010522288 0.5500000 20.31696
## [43] {APP2117} => {APP1215} 0.0015305146 0.6666667 48.73660
## [44] {APP1914} => {APP1041} 0.0011478860 0.5217391 165.28063
## [45] {APP1803} => {APP1215} 0.0016261718 0.8947368 65.40964
## [46] {APP2155} => {APP1215} 0.0025827434 0.5510204 40.28229
## [47] {APP0979} => {APP0692} 0.0044958867 0.6103896 22.00349
## [48] {APP2117,APP2155} => {APP1215} 0.0005739430 0.5454545 39.87540
## count
## [1] 6
## [2] 6
## [3] 7
## [4] 7
## [5] 6
## [6] 7
## [7] 7
## [8] 6
## [9] 6
## [10] 6
## [11] 6
## [12] 6
## [13] 8
## [14] 6
## [15] 6
## [16] 7
## [17] 7
## [18] 6
## [19] 6
## [20] 7
## [21] 6
## [22] 7
## [23] 8
## [24] 7
## [25] 6
## [26] 8
## [27] 9
## [28] 11
## [29] 11
## [30] 9
## [31] 12
## [32] 8
## [33] 13
## [34] 7
## [35] 10
## [36] 10
## [37] 12
## [38] 10
## [39] 10
## [40] 14
## [41] 10
## [42] 11
## [43] 16
## [44] 12
## [45] 17
## [46] 27
## [47] 47
## [48] 6
#inspect(associationrules[1:6])
summary(associationrules1) # 48 rules: 47 rules with two products, 1 with three
## set of 48 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3
## 47 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.021 2.000 3.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.0005739 Min. :0.5000 Min. : 18.47 Min. : 6.000
## 1st Qu.:0.0005739 1st Qu.:0.5455 1st Qu.: 39.88 1st Qu.: 6.000
## Median :0.0006696 Median :0.6177 Median : 131.89 Median : 7.000
## Mean :0.0009227 Mean :0.6521 Mean : 309.63 Mean : 9.646
## 3rd Qu.:0.0009805 3rd Qu.:0.6942 3rd Qu.: 338.22 3rd Qu.:10.250
## Max. :0.0044959 Max. :1.0000 Max. :1493.43 Max. :47.000
##
## mining info:
## data ntransactions support confidence
## tr 10454 5e-04 0.5
#set different Confidences values
# associationrules2 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6))
# inspect(associationrules2)
# associationrules3 <- apriori(tr, parameter = list(supp=0.0005, conf=0.7))
# inspect(associationrules3)
# associationrules4 <- apriori(tr, parameter = list(supp=0.0005, conf=0.9))
# inspect(associationrules4)
# associationrules5 <- apriori(tr, parameter = list(supp=0.0005, conf=1))
# inspect(associationrules5)
#set different Support values
# associationrulesS1 <- apriori(tr, parameter = list(supp=0.0001, conf=0.6))
# associationrulesS2 <- apriori(tr, parameter = list(supp=0.0003, conf=0.6))
# associationrulesS3 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6))
# associationrulesS4 <- apriori(tr, parameter = list(supp=0.0005, conf=0.6))
# associationrulesS5 <- apriori(tr, parameter = list(supp=0.0007, conf=0.6))
# associationrulesS6 <- apriori(tr, parameter = list(supp=0.001, conf=0.6))
# associationrulesS7 <- apriori(tr, parameter = list(supp=0.00001, conf=0.6))
# inspect(associationrulesS7[1:100])
# summary(associationrulesS7)
# associationrulesS8 <- apriori(tr, parameter = list(supp=0.00009, conf=0.6))
# inspect(associationrulesS8[1:100])
# summary(associationrulesS8)
# Min Support as 0.0003, confidence as 0.6 => 63 rules
associationrulesS2 <- apriori(tr, parameter = list(supp=0.0003, conf=0.6))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 3e-04 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 3
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4248 item(s), 10454 transaction(s)] done [0.01s].
## sorting and recoding items ... [1596 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.01s].
## writing ... [63 rule(s)] done [0.01s].
## creating S4 object ... done [0.00s].
summary(associationrulesS2)
## set of 63 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3
## 53 10
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.159 2.000 3.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.0003826 Min. :0.6000 Min. : 22.00 Min. : 4.000
## 1st Qu.:0.0003826 1st Qu.:0.6667 1st Qu.: 55.07 1st Qu.: 4.000
## Median :0.0004783 Median :0.7143 Median : 186.68 Median : 5.000
## Mean :0.0006195 Mean :0.7664 Mean : 422.32 Mean : 6.476
## 3rd Qu.:0.0006696 3rd Qu.:0.8167 3rd Qu.: 805.35 3rd Qu.: 7.000
## Max. :0.0044959 Max. :1.0000 Max. :1493.43 Max. :47.000
##
## mining info:
## data ntransactions support confidence
## tr 10454 3e-04 0.6
inspect(associationrulesS2[1:10])
## lhs rhs support confidence lift count
## [1] {PRY0006} => {PRY0003} 0.0003826287 1.0 614.9412 4
## [2] {POL0007} => {POL0010} 0.0003826287 1.0 1161.5556 4
## [3] {REP0233} => {REP0232} 0.0003826287 0.8 1194.7429 4
## [4] {SNS0021} => {SNS0014} 0.0003826287 0.8 309.7481 4
## [5] {APP2158} => {APP1215} 0.0003826287 1.0 73.1049 4
## [6] {IFX0047} => {IFX0036} 0.0003826287 1.0 197.2453 4
## [7] {OWC0056} => {OWC0054} 0.0005739430 1.0 871.1667 6
## [8] {OWC0058} => {OWC0057} 0.0003826287 0.8 836.3200 4
## [9] {POL0008} => {POL0010} 0.0004782858 1.0 1161.5556 5
## [10] {AKI0015} => {APP1913} 0.0003826287 0.8 66.3746 4
#Inspect results
inspect(sort(associationrulesS2, decreasing = TRUE, by = "lift")[1:25])
## lhs rhs support confidence lift
## [1] {WAC0156} => {WAC0158} 0.0006696002 1.0000000 1493.4286
## [2] {WAC0158} => {WAC0156} 0.0006696002 1.0000000 1493.4286
## [3] {NEA0012} => {NEA0008} 0.0003826287 1.0000000 1306.7500
## [4] {REP0233} => {REP0232} 0.0003826287 0.8000000 1194.7429
## [5] {NTE0067} => {NTE0068} 0.0003826287 0.8000000 1194.7429
## [6] {POL0007} => {POL0010} 0.0003826287 1.0000000 1161.5556
## [7] {POL0008} => {POL0010} 0.0004782858 1.0000000 1161.5556
## [8] {NES0009} => {NES0006} 0.0006696002 1.0000000 1045.4000
## [9] {NES0006} => {NES0009} 0.0006696002 0.7000000 1045.4000
## [10] {IHE0020} => {IHE0017} 0.0004782858 0.8333333 967.9630
## [11] {IHE0019} => {IHE0017} 0.0003826287 0.8000000 929.2444
## [12] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.1667
## [13] {NEA0009} => {NEA0011} 0.0003826287 0.6666667 871.1667
## [14] {NEA0011} => {NEA0004} 0.0005739430 0.7500000 871.1667
## [15] {NEA0004} => {NEA0011} 0.0005739430 0.6666667 871.1667
## [16] {OWC0058} => {OWC0057} 0.0003826287 0.8000000 836.3200
## [17] {NEA0009} => {NEA0004} 0.0003826287 0.6666667 774.3704
## [18] {NEA0001} => {NEA0004} 0.0004782858 0.6250000 725.9722
## [19] {PRY0006} => {PRY0003} 0.0003826287 1.0000000 614.9412
## [20] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.9412
## [21] {SSE0005} => {SSE0004} 0.0006696002 0.6363636 604.7769
## [22] {SSE0004} => {SSE0005} 0.0006696002 0.6363636 604.7769
## [23] {SNS0010} => {SNS0019} 0.0004782858 0.7142857 466.6964
## [24] {APP0657,SAT0048} => {MMW0010} 0.0004782858 0.7142857 439.2437
## [25] {ELA0019} => {ELA0016} 0.0004782858 0.6250000 362.9861
## count
## [1] 7
## [2] 7
## [3] 4
## [4] 4
## [5] 4
## [6] 4
## [7] 5
## [8] 7
## [9] 7
## [10] 5
## [11] 4
## [12] 6
## [13] 4
## [14] 6
## [15] 6
## [16] 4
## [17] 4
## [18] 5
## [19] 4
## [20] 6
## [21] 7
## [22] 7
## [23] 5
## [24] 5
## [25] 5
inspect(sort(associationrulesS2, decreasing = TRUE, by = "support")[1:25])
## lhs rhs support confidence lift count
## [1] {APP0979} => {APP0692} 0.0044958867 0.6103896 22.00349 47
## [2] {APP1803} => {APP1215} 0.0016261718 0.8947368 65.40964 17
## [3] {APP2117} => {APP1215} 0.0015305146 0.6666667 48.73660 16
## [4] {SPH0016} => {SPH0015} 0.0010522288 0.6470588 338.21765 11
## [5] {OWC0104} => {NTE0007} 0.0009565716 0.6250000 36.50140 10
## [6] {APP2118} => {APP1215} 0.0008609145 0.6428571 46.99600 9
## [7] {APP2486} => {APP1184} 0.0008609145 0.6923077 25.57380 9
## [8] {APP1575} => {LIBRO} 0.0007652573 0.6666667 142.23129 8
## [9] {IFX0087} => {IFX0028} 0.0007652573 0.7272727 330.56126 8
## [10] {WAC0156} => {WAC0158} 0.0006696002 1.0000000 1493.42857 7
## [11] {WAC0158} => {WAC0156} 0.0006696002 1.0000000 1493.42857 7
## [12] {NES0009} => {NES0006} 0.0006696002 1.0000000 1045.40000 7
## [13] {NES0006} => {NES0009} 0.0006696002 0.7000000 1045.40000 7
## [14] {SSE0005} => {SSE0004} 0.0006696002 0.6363636 604.77686 7
## [15] {SSE0004} => {SSE0005} 0.0006696002 0.6363636 604.77686 7
## [16] {SPH0014} => {SPH0015} 0.0006696002 0.6363636 332.62727 7
## [17] {APP2113} => {APP1215} 0.0006696002 0.7777778 56.85936 7
## [18] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.16667 6
## [19] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.94118 6
## [20] {PAC2115} => {CAD0005} 0.0005739430 0.7500000 186.67857 6
## [21] {APP2125} => {APP1215} 0.0005739430 0.7500000 54.82867 6
## [22] {QNA0149} => {WDT0177} 0.0005739430 0.6666667 37.67207 6
## [23] {APP2114} => {APP1215} 0.0005739430 0.6000000 43.86294 6
## [24] {APP1623} => {APP1215} 0.0005739430 0.6666667 48.73660 6
## [25] {PAC2154} => {APP0017} 0.0005739430 0.6000000 101.16774 6
inspect(sort(associationrulesS2, decreasing = TRUE, by = "confidence")[1:25])
## lhs rhs support confidence lift
## [1] {PRY0006} => {PRY0003} 0.0003826287 1.0000000 614.94118
## [2] {POL0007} => {POL0010} 0.0003826287 1.0000000 1161.55556
## [3] {APP2158} => {APP1215} 0.0003826287 1.0000000 73.10490
## [4] {IFX0047} => {IFX0036} 0.0003826287 1.0000000 197.24528
## [5] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.16667
## [6] {POL0008} => {POL0010} 0.0004782858 1.0000000 1161.55556
## [7] {WAC0156} => {WAC0158} 0.0006696002 1.0000000 1493.42857
## [8] {WAC0158} => {WAC0156} 0.0006696002 1.0000000 1493.42857
## [9] {APP1495} => {APP1215} 0.0003826287 1.0000000 73.10490
## [10] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.94118
## [11] {NES0009} => {NES0006} 0.0006696002 1.0000000 1045.40000
## [12] {NEA0012} => {NEA0008} 0.0003826287 1.0000000 1306.75000
## [13] {APP1803,APP2113} => {APP1215} 0.0003826287 1.0000000 73.10490
## [14] {APP1803} => {APP1215} 0.0016261718 0.8947368 65.40964
## [15] {IHE0020} => {IHE0017} 0.0004782858 0.8333333 967.96296
## [16] {CRU0047,OWC0147} => {NTE0007} 0.0004782858 0.8333333 48.66853
## [17] {REP0233} => {REP0232} 0.0003826287 0.8000000 1194.74286
## [18] {SNS0021} => {SNS0014} 0.0003826287 0.8000000 309.74815
## [19] {OWC0058} => {OWC0057} 0.0003826287 0.8000000 836.32000
## [20] {AKI0015} => {APP1913} 0.0003826287 0.8000000 66.37460
## [21] {APP2111} => {APP1215} 0.0003826287 0.8000000 58.48392
## [22] {NTE0067} => {NTE0068} 0.0003826287 0.8000000 1194.74286
## [23] {IHE0019} => {IHE0017} 0.0003826287 0.8000000 929.24444
## [24] {MIN0005,OTT0171} => {APP1184} 0.0003826287 0.8000000 29.55194
## [25] {APP1184,OTT0171} => {MIN0005} 0.0003826287 0.8000000 167.26400
## count
## [1] 4
## [2] 4
## [3] 4
## [4] 4
## [5] 6
## [6] 5
## [7] 7
## [8] 7
## [9] 4
## [10] 6
## [11] 7
## [12] 4
## [13] 4
## [14] 17
## [15] 5
## [16] 5
## [17] 4
## [18] 4
## [19] 4
## [20] 4
## [21] 4
## [22] 4
## [23] 4
## [24] 4
## [25] 4
#Improving the model
#Removing redundant rules (rules that are subsets of larger rules)
associationSubsetS2<- which(colSums(is.subset(associationrulesS2, associationrulesS2)) > 1) # get subset rules in vector
length(associationSubsetS2) #15
## [1] 15
associationrulesNosubsets <- associationrulesS2[-associationSubsetS2] # remove subset rules.
summary(associationrulesNosubsets) #48 rules
## set of 48 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3
## 45 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 2.000 2.062 2.000 3.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.0003826 Min. :0.6000 Min. : 22.00 Min. : 4.000
## 1st Qu.:0.0003826 1st Qu.:0.6667 1st Qu.: 51.08 1st Qu.: 4.000
## Median :0.0004783 Median :0.7208 Median : 139.18 Median : 5.000
## Mean :0.0006437 Mean :0.7638 Mean : 362.07 Mean : 6.729
## 3rd Qu.:0.0005739 3rd Qu.:0.8083 3rd Qu.: 642.70 3rd Qu.: 6.000
## Max. :0.0044959 Max. :1.0000 Max. :1306.75 Max. :47.000
##
## mining info:
## data ntransactions support confidence
## tr 10454 3e-04 0.6
inspect(associationrulesNosubsets[1:20])
## lhs rhs support confidence lift count
## [1] {PRY0006} => {PRY0003} 0.0003826287 1.0000000 614.94118 4
## [2] {POL0007} => {POL0010} 0.0003826287 1.0000000 1161.55556 4
## [3] {REP0233} => {REP0232} 0.0003826287 0.8000000 1194.74286 4
## [4] {SNS0021} => {SNS0014} 0.0003826287 0.8000000 309.74815 4
## [5] {APP2158} => {APP1215} 0.0003826287 1.0000000 73.10490 4
## [6] {IFX0047} => {IFX0036} 0.0003826287 1.0000000 197.24528 4
## [7] {OWC0056} => {OWC0054} 0.0005739430 1.0000000 871.16667 6
## [8] {OWC0058} => {OWC0057} 0.0003826287 0.8000000 836.32000 4
## [9] {POL0008} => {POL0010} 0.0004782858 1.0000000 1161.55556 5
## [10] {AKI0015} => {APP1913} 0.0003826287 0.8000000 66.37460 4
## [11] {APP2111} => {APP1215} 0.0003826287 0.8000000 58.48392 4
## [12] {APP1495} => {APP1215} 0.0003826287 1.0000000 73.10490 4
## [13] {LAC0235} => {APP1913} 0.0003826287 0.6666667 55.31217 4
## [14] {LAC0238} => {APP1913} 0.0004782858 0.6250000 51.85516 5
## [15] {PRY0004} => {PRY0003} 0.0005739430 1.0000000 614.94118 6
## [16] {NTE0067} => {NTE0068} 0.0003826287 0.8000000 1194.74286 4
## [17] {PAC2111} => {CAD0005} 0.0003826287 0.6666667 165.93651 4
## [18] {NEA0012} => {NEA0008} 0.0003826287 1.0000000 1306.75000 4
## [19] {SNS0010} => {SNS0019} 0.0004782858 0.7142857 466.69643 5
## [20] {PAC2115} => {CAD0005} 0.0005739430 0.7500000 186.67857 6
#Look for Redundants => no redundants
is.redundant(associationrulesNosubsets)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE
#Visualizations
#default plot
plot(associationrulesNosubsets[1:20], control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Scatter plot for 20 rules
## engine = default
## pch = 19
## cex = 0.5
## xlim = NULL
## ylim = NULL
## zlim = NULL
## alpha = NULL
## col = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## newpage = TRUE
## jitter = NA
## verbose = FALSE
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

#graph plot
plot(associationrulesNosubsets[1:20], method="graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Graph for 20 rules
## nodeColors = c("#66CC6680", "#9999CC80")
## nodeCol = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol = c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF", "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF", "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha = 0.5
## cex = 1
## itemLabels = TRUE
## labelCol = #000000B3
## measureLabels = FALSE
## precision = 3
## layout = NULL
## layoutParams = list()
## arrowSize = 0.5
## engine = igraph
## plot = TRUE
## plot_options = list()
## max = 100
## verbose = FALSE

#two key plot
plot(associationrulesNosubsets,method="two-key plot")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plotly_arules(associationrulesNosubsets[1:20])
## Warning: 'plotly_arules' is deprecated.
## Use 'plot' instead.
## See help("Deprecated")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
#Filtering rules
#confidence > 0.4
subRules<-associationrulesNosubsets[quality(associationrulesNosubsets)$confidence>0.4]
plot(subRules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

#top 10 by confidence
top10subRules <- head(subRules, n = 10, by = "confidence")
plot(top10subRules, method = "graph", engine = "htmlwidget")